# coding=utf8

import pandas as pd


class Preliminary:

    df = pd.DataFrame(
        {"a": [20, 20, 30],
         "b": [50, 50, 50],
         "c": [50, 50, 50]},
        index=list("xyz")
    )

    @staticmethod
    def df_duplicated():
        df = Preliminary.df
        print(
            ">>> df\n"
            f"{df}\n"
            ">>> df.duplicated()\n"
            f"{df.duplicated()}\n"
            ">>> df.duplicated(keep=False)\n"
            f"{df.duplicated(keep=False)}\n"
            ">>> df.duplicated(subset=['b','c'], keep=False)\n"
            f"{df.duplicated(subset=['b','c'], keep=False)}\n"
        )

    @staticmethod
    def df_drop_duplicates():
        df = Preliminary.df
        print(
            f">>> df\n{df}\n"
            ">>> df.drop_duplicates()\n"
            f"{df.drop_duplicates()}\n"
            ">>> df.drop_duplicates(subset=['b', 'c'], keep='last')\n"
            f"{df.drop_duplicates(subset=['b', 'c'], keep='last')}\n"
            # "删除重复数据, 保留重复中的第一条数据, 忽略索引(不重排索引)：\n"
            ">>> df.drop_duplicates(keep=False, ignore_index=True)\n"
            f"{df.drop_duplicates(keep='first', ignore_index=False)}\n"
            ">>> df.drop_duplicates(keep=False, ignore_index=True)\n"
            f"{df.drop_duplicates(keep='last', ignore_index=False)}\n"
            "删除所有重复数据：\n"
            ">>> df.drop_duplicates(keep=False, ignore_index=True)\n"
            f"{df.drop_duplicates(keep=False, ignore_index=True)}\n"
        )


def task():
    df = pd.read_csv("stu93.csv", index_col=0)
    print(
        "# 使用read_csv读出文件数据\n"
        " >>> df = pd.read_csv('stu93.csv', index_col=0)\n"
        "# 读出数据结果：\n"
        f"{df}\n"
        ">>> df.duplicated(keep=False)\n"
        "检测重复数据结果：\n"
        f"{df.duplicated(keep=False)}\n"
        "删除重复数据后结果：\n"
        ">>> df.drop_duplicates(keep=False, ignore_index=True, inplace=True)\n"
        f"{df.drop_duplicates(keep='first', ignore_index=True, inplace=True)}\n"
        f"{df}"
    )


def training1():
    df = pd.read_csv("stu93.csv", index_col=0)
    print(
        "# 使用read_csv读出文件数据\n"
        " >>> df = pd.read_csv('stu93.csv', index_col=0)\n"
        "# 读出数据结果：\n"
        f"{df}\n"
        ">>> df.duplicated(keep=False)\n"
        "检测重复数据结果：\n"
        f"{df.duplicated(subset=['age', 'yw'], keep=False)}\n"
        "删除重复数据后结果：\n"
        ">>> df.drop_duplicates(keep=False, ignore_index=False, inplace=True)\n"
        f"{df.drop_duplicates(subset=['age', 'yw'], keep='last', ignore_index=False, inplace=True)}\n"
        f"{df}\n"
        ">>> df.to_csv('stu93nodup.csv')\n"
        f"{df.to_csv('stu93nodup.csv')}"
    )


if __name__ == '__main__':
    Preliminary.df_duplicated()
    # Preliminary.df_drop_duplicates()
    # task()
    # training1()
